rm(list=ls())
library(Hmisc)
library(dplyr)
library(tidyverse)
library(readtext)
#Sidak Y
#04/28/2021
#Purpose: Generate dataset  appeal_caseid appeal_division appeal_file_date appeal-final_date appeal_oral_date appeal_type
#                           sent_court sent_county sent_caseid sent_judge sent_date
#                           defendant_name defendent_appeals 
#                           appeal_decision appeal_decision_date appeal_type appeal_judge1 appeal_judge2 appeal_judge3
#1. Load case summary and generate 5 columns
#2. Load sent files and generate 5 columns
#3. Load defendant file and generate 2 columns
#4. Load disposition and generate 5 columns

#set working directory
main_dir = "...\\Additional Data\CA Appellate Courts"

#Data
all_files = list.files(main_dir,recursive=T,full.names = T )

##################################################
###1. Load case summary and generate 5 columns ###
##################################################
#load case files
case_files = all_files[grep("case",all_files)]

case_dat <- read.csv(case_files,fill = T,flush = T)
case_dat <- case_dat[!duplicated(case_dat),]

colnames(case_dat)[colnames(case_dat)=="Court.of.Appeal.Case."]="appeal_caseid"
colnames(case_dat)[colnames(case_dat)=="Case.Type."]="appeal_type"
colnames(case_dat)[colnames(case_dat)=="Case.Caption."]="appeal_casename"
colnames(case_dat)[colnames(case_dat)=="district"]="appeal_district"
colnames(case_dat)[colnames(case_dat)=="Division."]="appeal_division"
colnames(case_dat)[colnames(case_dat)=="Filing.Date."]="appeal_file_date"
colnames(case_dat)[colnames(case_dat)=="Completion.Date."]="appeal_final_date"
colnames(case_dat)[colnames(case_dat)=="Oral.Argument.Date.Time."]="appeal_oral_date"
colnames(case_dat)[colnames(case_dat)=="Supreme.Court.Case."]="supreme_date"
case_dat <- case_dat[,c("appeal_caseid","appeal_type","appeal_district",
                        "appeal_division","appeal_file_date","appeal_final_date",
                        "appeal_casename","appeal_oral_date","supreme_date")]
case_dat <- case_dat[!duplicated(case_dat$appeal_caseid),]

################################################
###2. Load sent files and generate 5 columns ###
################################################
#load sent files
sent_files = all_files[grep("sent",all_files)]

sent_dat <- read.csv(sent_files,fill = T,flush = T)
colnames(sent_dat)[colnames(sent_dat)=="Trial.Court.Name."]="sent_court"
colnames(sent_dat)[colnames(sent_dat)=="County."]="sent_county"
colnames(sent_dat)[colnames(sent_dat)=="Trial.Court.Case.Number."]="sent_caseid"
colnames(sent_dat)[colnames(sent_dat)=="Trial.Court.Judge."]="sent_judge"
colnames(sent_dat)[colnames(sent_dat)=="Trial.Court.Judgment.Date."]="sent_date"
colnames(sent_dat)[colnames(sent_dat)=="source"]="appeal_caseid"
sent_dat$X <- NULL
sent_dat <- sent_dat[!duplicated(sent_dat),]

#two odd observations
sent_dat <- subset(sent_dat,!appeal_caseid %in% c(""," Jr.") )

####################################################
###3. Load defendant file and generate 2 columns ###
####################################################
#defendant_name defendent_appeals 
#load def files
party_files = all_files[grep("party",all_files)]

party_dat <- read.csv(party_files,fill = T,flush = T)

colnames(party_dat)[colnames(party_dat) %in% c("X","source")] = c("defendant_num","appeal_caseid")

#more fixes
party_dat$defendant_appeals[party_dat$appeal_caseid=="A145037"]=1
party_dat$defendant_appeals[party_dat$appeal_caseid=="A148158"]=1
party_dat = party_dat[!party_dat$defendant_name=="$15,500 United States Currency et al. ",]


#make sure the list of defendant_nums starts with 1
party_dat = party_dat %>%
  group_by(appeal_caseid) %>%
  mutate(min_num = min(defendant_num))
#if the smallest defendant_num is zero, add one
party_dat$defendant_num[party_dat$min_num==0]=party_dat$defendant_num[party_dat$min_num==0]+1
#if the smallest defendant_num is two, subtract one
party_dat$defendant_num[party_dat$min_num==2]=party_dat$defendant_num[party_dat$min_num==2]-1
party_dat$min_num <- NULL
table(party_dat$defendant_num)

#reshape wide
party_dat$defendant_name2[party_dat$defendant_num==2] = party_dat$defendant_name[party_dat$defendant_num==2]
party_dat$defendant_name3[party_dat$defendant_num==3] = party_dat$defendant_name[party_dat$defendant_num==3]
party_dat$defendant_name4[party_dat$defendant_num==4] = party_dat$defendant_name[party_dat$defendant_num==4]
party_dat$defendant_name5[party_dat$defendant_num==5] = party_dat$defendant_name[party_dat$defendant_num==5]
party_dat$defendant_name6[party_dat$defendant_num==6] = party_dat$defendant_name[party_dat$defendant_num==6]
party_dat$defendant_name7[party_dat$defendant_num==7] = party_dat$defendant_name[party_dat$defendant_num==7]
party_dat$defendant_name8[party_dat$defendant_num==8] = party_dat$defendant_name[party_dat$defendant_num==8]
party_dat$defendant_name9[party_dat$defendant_num==9] = party_dat$defendant_name[party_dat$defendant_num==9]
party_dat$defendant_name10[party_dat$defendant_num==10] = party_dat$defendant_name[party_dat$defendant_num==10]
party_dat$defendant_name11[party_dat$defendant_num==11] = party_dat$defendant_name[party_dat$defendant_num==11]
party_dat$defendant_name12[party_dat$defendant_num==12] = party_dat$defendant_name[party_dat$defendant_num==12]
party_dat$defendant_name13[party_dat$defendant_num==13] = party_dat$defendant_name[party_dat$defendant_num==13]
party_dat$defendant_name14[party_dat$defendant_num==14] = party_dat$defendant_name[party_dat$defendant_num==14]
party_dat$defendant_name15[party_dat$defendant_num==15] = party_dat$defendant_name[party_dat$defendant_num==15]
party_dat$defendant_name16[party_dat$defendant_num==16] = party_dat$defendant_name[party_dat$defendant_num==16]
party_dat$defendant_name17[party_dat$defendant_num==17] = party_dat$defendant_name[party_dat$defendant_num==17]

party_dat = party_dat %>% group_by(appeal_caseid) %>%
  fill(defendant_name2:defendant_name11, .direction  = "updown")
party_dat <- subset(party_dat,defendant_num==1)
party_dat$defendant_num <- NULL

#################################################
###4. Load disposition and generate 5 columns ###
#################################################
#load def files
disp_files = all_files[grep("disp",all_files)]
disp_dat <- read.csv(disp_files,fill = T,flush = T)

colnames(disp_dat)[colnames(disp_dat)=="Description."]="appeal_decision"
colnames(disp_dat)[colnames(disp_dat)=="Date."]="appeal_decision_date"
colnames(disp_dat)[colnames(disp_dat)=="Disposition.Type."]="appeal_decision_detail"
colnames(disp_dat)[colnames(disp_dat)=="Publication.Status."]="appeal_status"
colnames(disp_dat)[colnames(disp_dat)=="Author."]="appeal_judge1"
colnames(disp_dat)[colnames(disp_dat)=="Participants."]="appeal_judge2"
colnames(disp_dat)[colnames(disp_dat)=="source"]="appeal_caseid"

disp_dat$X <- disp_dat$Case.Citation. <- NULL

disp_dat <- disp_dat[!duplicated(disp_dat),]

# Split the string of additional judge
disp_dat$x <- disp_dat$appeal_judge2
disp_dat$x <- gsub("Dissent","Concur",disp_dat$x)
disp_dat$x <- gsub("Concur with Opinion","Concur",disp_dat$x)

disp_dat$x1 <- as.character(unlist(
  lapply(c(1:nrow(disp_dat)), function(i) strsplit(disp_dat$x[i],split = "(Concur)",fixed = T)[[1]][1])
))
disp_dat$x2 <- as.character(unlist(
  lapply(c(1:nrow(disp_dat)), function(i) strsplit(disp_dat$x[i],split = "(Concur)",fixed = T)[[1]][2])
))

# list of judges in which the second judge may be incorrect
badlist <- c("Margulies, Sandra Lynn  \nSanchez, Gabriel P.",
  "Ashmann-Gerst, Judith Meisels  \nChavez, Victoria M.",
  "Ashmann-Gerst, Judith Meisels  \nGoodman, Allan J.",
  "Collins, Audrey B.  \nWillhite, Jr., Thomas Lyle",
  "Dillon, Timothy P.  \nFeuer, Gail Ruderman",
  "Egerton, Anne Harwood  \nDhanidina, Halim",
  "Elia, Franklin D.  \nPremo, Eugene M.",
  "Feuer, Gail Ruderman  \nSegal, John",
  "Flier, Madeleine I.  \nBigelow, Tricia A.",
  "Bendix, Helen I.  \nRothschild, Frances",
  "Bigelow, Tricia A.  \nGrimes, Elizabeth A.",
  "Bigelow, Tricia A.  \nWiley Jr., John Shepard","Blease, Coleman A.  \nDuarte, Elena J.",
  "Collins, Audrey B.  \nManella, Nora M.",
  "Collins, Audrey B.  \nWillhite, Jr., Thomas Lyle","Dillon, Timothy P.  \nFeuer, Gail Ruderman",
  "Egerton, Anne Harwood  \nDhanidina, Halim","Elia, Franklin D.  \nPremo, Eugene M.",
  "Feuer, Gail Ruderman  \nSegal, John","Flier, Madeleine I.  \nBigelow, Tricia A.",
  "Flier, Madeleine I.  \nGrimes, Elizabeth A.","Flier, Madeleine I.  \nRubin, Laurence D.",
  "Grimes, Elizabeth A.  \nBigelow, Tricia A.","Hoffstadt, Brian M.  \nAshmann-Gerst, Judith Meisels",
  "Hoffstadt, Brian M.  \nChavez, Victoria M.","Levy, Herbert I.  \nDeSantos, Thomas",
  "Lui, Elwood  \nAshmann-Gerst, Judith Meisels","Margulies, Sandra Lynn  \nSanchez, Gabriel P.",
  "Moor, Carl H.  \nBaker, Lamar W.","Perluss, Dennis M.  \nDillon, Timothy P.",
  "Perluss, Dennis M.  \nFeuer, Gail Ruderman","Perluss, Dennis M.  \nRichardson, Tony L.",
  "Perluss, Dennis M.  \nSegal, John","Perluss, Dennis M.  \nStone, Natalie P.",
  "Perluss, Dennis M.  \nZelon, Laurie D.","Perren, Steven Z.  \nGilbert, Arthur",
  "Perren, Steven Z.  \nYegan, Kenneth R.","Rubin, Laurence D.  \nBigelow, Tricia A.",
  "Segal, John  \nDillon, Timothy P.","Segal, John  \nFeuer, Gail Ruderman",
  "Stone, Natalie P.  \nFeuer, Gail Ruderman","Stone, Natalie P.  \nZelon, Laurie D.",
  "Tangeman, Martin J.  \nGilbert, Arthur","Tangeman, Martin J.  \nPerren, Steven Z.",
  "Weingart, Gregory J.  \nRothschild, Frances","Willhite, Jr., Thomas Lyle  \nManella, Nora M.",
  "Yegan, Kenneth R.  \nGilbert, Arthur","Zelon, Laurie D.  \nFeuer, Gail Ruderman",
  "Zelon, Laurie D.  \nSegal, John","Zelon, Laurie D.  \nStone, Natalie P.",
  "Fybel, Richard David  \nIkola, Raymond J.","Fybel, Richard David  \nO'Leary, Kathleen E.",
  "Goethals, Thomas M.  \nMoore, Eileen C.","Miller, Douglas P.  \nMcKinster, Art W.",
  "iller, Douglas P.  \nRamirez, Manuel A.","Slough, Marsha  \nHollenhorst, Thomas E.",
  "Thompson, David A.  \nDunning, Kim Garlin","Miller, Douglas P.  \nRamirez, Manuel A."
  )
for (i in c(1:length(badlist))){
  disp_dat$x2[trimws(disp_dat$x1)==(badlist[i])&!is.na(disp_dat$x1)] = trimws(
    strsplit(badlist[i],split = "\n",fixed = T)[[1]][2]
  )
  disp_dat$x1[trimws(disp_dat$x1)==(badlist[i])&!is.na(disp_dat$x1)] = trimws(
    strsplit(badlist[i],split = "\n",fixed = T)[[1]][1]
  )
}
disp_dat$appeal_judge1[!is.na(disp_dat$x1)&
                         disp_dat$x1=="Robie, Ronald B.  \nKrause, Peter A.  \nMurray, Jr., William J.  "]=
  "Robie, Ronald B."
disp_dat$x2[!is.na(disp_dat$x1)&
                         disp_dat$x1=="Robie, Ronald B.  \nKrause, Peter A.  \nMurray, Jr., William J.  "]=
  "Murray, Jr., William J."
disp_dat$x1[!is.na(disp_dat$x1)&
                         disp_dat$x1=="Robie, Ronald B.  \nKrause, Peter A.  \nMurray, Jr., William J.  "]=
  "Krause, Peter A."

disp_dat$appeal_judge1[!is.na(disp_dat$x1)&
                         disp_dat$x1=="Rubin, Laurence D.  \nTurner, Paul  \nGrimes, Elizabeth A.  "]=
  "Rubin, Laurence D."
disp_dat$x2[!is.na(disp_dat$x1)&
              disp_dat$x1=="Rubin, Laurence D.  \nTurner, Paul  \nGrimes, Elizabeth A.  "]=
  "Grimes, Elizabeth A."
disp_dat$x1[!is.na(disp_dat$x1)&
              disp_dat$x1=="Rubin, Laurence D.  \nTurner, Paul  \nGrimes, Elizabeth A.  "]=
  "Turner, Paul"

disp_dat$appeal_judge1[!is.na(disp_dat$x1)&
                         disp_dat$x1=="Nicholson, George W.  \nHoch, Andrea L.  \nDuarte, Elena J.  " ]=
  "Nicholson, George W."
disp_dat$x2[!is.na(disp_dat$x1)&
              disp_dat$x1=="Nicholson, George W.  \nHoch, Andrea L.  \nDuarte, Elena J.  " ]=
  "Duarte, Elena J."
disp_dat$x1[!is.na(disp_dat$x1)&
              disp_dat$x1=="Nicholson, George W.  \nHoch, Andrea L.  \nDuarte, Elena J.  " ]=
  "Hoch, Andrea L."

disp_dat$appeal_judge1[!is.na(disp_dat$x1)&
                         disp_dat$x1=="Blease, Coleman A.  \nButz, M. Kathleen  \nHull, Jr., Harry E.  "]=
  "Blease, Coleman A."
disp_dat$x2[!is.na(disp_dat$x1)&
              disp_dat$x1=="Blease, Coleman A.  \nButz, M. Kathleen  \nHull, Jr., Harry E.  "]=
  "Hull, Jr., Harry E."
disp_dat$x1[!is.na(disp_dat$x1)&
              disp_dat$x1=="Blease, Coleman A.  \nButz, M. Kathleen  \nHull, Jr., Harry E.  "]=
  "Butz, M. Kathleen"


disp_dat$x2[!is.na(disp_dat$appeal_judge1)&
              disp_dat$appeal_judge1=="Grimes, Elizabeth A. Rubin, Laurence D. Bigelow, Tricia A."]=
  "Bigelow, Tricia A."
disp_dat$x1[!is.na(disp_dat$appeal_judge1)&
              disp_dat$appeal_judge1=="Grimes, Elizabeth A. Rubin, Laurence D. Bigelow, Tricia A."]=
  "Rubin, Laurence D."
disp_dat$appeal_judge1[!is.na(disp_dat$appeal_judge1)&
                         disp_dat$appeal_judge1=="Grimes, Elizabeth A. Rubin, Laurence D. Bigelow, Tricia A."]=
  "Grimes, Elizabeth A."

disp_dat$x2[!is.na(disp_dat$appeal_judge1)&
              disp_dat$appeal_judge1=="Ashmann-Gerst, Judith Meisels Chavez, Victoria M."]=
  "Chavez, Victoria M."
disp_dat$appeal_judge1[!is.na(disp_dat$appeal_judge1)&
                disp_dat$appeal_judge1=="Ashmann-Gerst, Judith Meisels Chavez, Victoria M."]=
  "Ashmann-Gerst, Judith Meisels"

disp_dat$appeal_judge1[!is.na(disp_dat$appeal_judge1)&
                         disp_dat$appeal_judge1=="Ashmann-Gerst, Judith Meisels Ashmann-Gerst, Judith Meisels"]=
  "Ashmann-Gerst, Judith Meisels"

disp_dat$appeal_judge1[!is.na(disp_dat$appeal_judge1)&
                         disp_dat$appeal_judge1=="Dondero, Robert L. Dondero, Robert L."]=
  "Dondero, Robert L."

disp_dat$x2[!is.na(disp_dat$appeal_judge1)&
              disp_dat$appeal_judge1=="Bendix, Helen I. Johnson, Jeffrey W."]=
  "Johnson, Jeffrey W."
disp_dat$appeal_judge1[!is.na(disp_dat$appeal_judge1)&
                         disp_dat$appeal_judge1=="Bendix, Helen I. Johnson, Jeffrey W."]=
  "Bendix, Helen I."

disp_dat$x2[!is.na(disp_dat$appeal_judge1)&
              disp_dat$appeal_judge1=="Epstein, Norman L. Willhite, Jr., Thomas Lyle"]=
  "Epstein, Norman L."
disp_dat$appeal_judge1[!is.na(disp_dat$appeal_judge1)&
                         disp_dat$appeal_judge1=="Epstein, Norman L. Willhite, Jr., Thomas Lyle"]=
  "Willhite, Jr., Thomas Lyle"

disp_dat$appeal_judge1[!is.na(disp_dat$appeal_judge1)&
                         disp_dat$appeal_judge1=="Jenkins, Martin J. Jenkins, Martin J."]=
  "Jenkins, Martin J."

disp_dat$appeal_judge1[!is.na(disp_dat$appeal_judge1)&
                         disp_dat$appeal_judge1=="Siggins, Peter J. Siggins, Peter J."]=
  "Siggins, Peter J."

disp_dat$x2[!is.na(disp_dat$appeal_judge1)&
              disp_dat$appeal_judge1=="Perluss, Dennis M. Zelon, Laurie D."]=
  "Perluss, Dennis M."
disp_dat$appeal_judge1[!is.na(disp_dat$appeal_judge1)&
                         disp_dat$appeal_judge1=="Perluss, Dennis M. Zelon, Laurie D."]=
  "Zelon, Laurie D."

disp_dat$x2[!is.na(disp_dat$appeal_judge1)&
              disp_dat$appeal_judge1=="O'Leary, Kathleen E. Fybel, Richard David"]=
  "O'Leary, Kathleen E."
disp_dat$appeal_judge1[!is.na(disp_dat$appeal_judge1)&
                         disp_dat$appeal_judge1=="O'Leary, Kathleen E. Fybel, Richard David"]=
  "Fybel, Richard David"

disp_dat$x2[!is.na(disp_dat$appeal_judge1)&
              disp_dat$appeal_judge1=="Hollenhorst, Thomas E. McKinster, Art W."]=
  "Hollenhorst, Thomas E."
disp_dat$appeal_judge1[!is.na(disp_dat$appeal_judge1)&
                         disp_dat$appeal_judge1=="Hollenhorst, Thomas E. McKinster, Art W."]=
  "McKinster, Art W."

disp_dat$x2[grep("Wiseman, Rebecca A.",disp_dat$x2)]="Wiseman, Rebecca A."

disp_dat$appeal_judge1<-trimws(disp_dat$appeal_judge1)
disp_dat$x1<-trimws(disp_dat$x1)
disp_dat$x2<-trimws(disp_dat$x2)
colnames(disp_dat)[colnames(disp_dat)=="appeal_judge2"]="appeal_judges_desc"
colnames(disp_dat)[colnames(disp_dat)=="x1"]="appeal_judge2"
colnames(disp_dat)[colnames(disp_dat)=="x2"]="appeal_judge3"
disp_dat$x <- NULL

#remove filler names
disp_dat$appeal_judge1[disp_dat$appeal_judge1 %in% c("Pro, Tem",
                                                     "Judge, Assigned","Justice, Assigned")]=NA
disp_dat$appeal_judge2[disp_dat$appeal_judge2 %in% c("Pro, Tem",
                                                     "Judge, Assigned","Justice, Assigned")]=NA
disp_dat$appeal_judge3[disp_dat$appeal_judge3 %in% c("Pro, Tem",
                                                     "Judge, Assigned","Justice, Assigned")]=NA

#get unique panel_fe
for (j in c(1:nrow(disp_dat))){
  disp_dat$panel[j] = paste(sort(c(disp_dat$appeal_judge1[j],disp_dat$appeal_judge2[j],disp_dat$appeal_judge3[j])),collapse = "|")
}
disp_dat$panel[is.na(disp_dat$appeal_judge1)|disp_dat$appeal_judge1==""]=NA
disp_dat$panel[is.na(disp_dat$appeal_judge2)|disp_dat$appeal_judge2==""]=NA
disp_dat$panel[is.na(disp_dat$appeal_judge3)|disp_dat$appeal_judge3==""]=NA
disp_dat$panel_id <- as.factor(disp_dat$panel)
##########################
###5. Combine Datasets ###
##########################
setdiff(case_dat$appeal_caseid,sent_dat$appeal_caseid)
setdiff(sent_dat$appeal_caseid,case_dat$appeal_caseid)
final_dat <- merge(case_dat,sent_dat,by = "appeal_caseid",all = T)

setdiff(party_dat$appeal_caseid,final_dat$appeal_caseid)
setdiff(final_dat$appeal_caseid,party_dat$appeal_caseid)
final_dat <- merge(final_dat,party_dat,by = "appeal_caseid", all= T)

setdiff(final_dat$appeal_caseid,disp_dat$appeal_caseid)
setdiff(disp_dat$appeal_caseid,final_dat$appeal_caseid)
final_dat <- merge(final_dat,disp_dat,by = "appeal_caseid", all = T)

#remove CV cases (companies or corporations)
final_dat <- subset(final_dat,!appeal_type=="CV")

final_dat$decision_cleaned = NA
final_dat$decision_cleaned[grep("Reversed",final_dat$appeal_decision)]="reversed"
final_dat$decision_cleaned[grep("Affirmed",final_dat$appeal_decision)]="affirmed"

#partial agreement
final_dat$decision_cleaned[grep("Remanded to trial court with directions",final_dat$appeal_decision)]="reversed"
final_dat$decision_cleaned[grep("Affirmed in part and reversed in part",final_dat$appeal_decision)]="reversed"
final_dat$decision_cleaned[grep("One or more cnts aff. & one or more cnts reversed",final_dat$appeal_decision)]="reversed"
final_dat$decision_cleaned[grep("Affirmed/part, rev/part, remanded w/direction",final_dat$appeal_decision)]="reversed"
final_dat$decision_cleaned[grep("Affirmed in part, modified in part, rev. in part",final_dat$appeal_decision)]="reversed"

#cases dismissed 
final_dat$decision_cleaned[grep("Dismissed by opinion",final_dat$appeal_decision)]=NA
final_dat$decision_cleaned[grep("Petition summarily denied by order",final_dat$appeal_decision)]=NA
final_dat$decision_cleaned[grep("Case transferred out of district",final_dat$appeal_decision)]=NA

#prodefendant = 1 if 
final_dat$prodefendant <- NA
#Reversed and defendant has appealed
final_dat$prodefendant[final_dat$decision_cleaned=="reversed"&
                         final_dat$defendant_appeals==1]=1
final_dat$prodefendant[final_dat$decision_cleaned=="affirmed"&
                         final_dat$defendant_appeals==1]=0
#Affirmed and prosecution has appealed
final_dat$prodefendant[final_dat$decision_cleaned=="affirmed"&
                         final_dat$defendant_appeals==0]=1
final_dat$prodefendant[final_dat$decision_cleaned=="reversed"&
                         final_dat$defendant_appeals==0]=0

describe(final_dat$prodefendant)

#random string match with sent_date
final_dat$sent_date = trimws(final_dat$sent_date)
final_dat$sent_date2 = stringr::str_remove(final_dat$sent_date, "^0+")
final_dat$sent_date2 <- gsub("/0","/",final_dat$sent_date2,fixed = T)
final_dat$sent_date2[is.na(final_dat$sent_date)|final_dat$sent_date==""]=NA
final_dat$sent_date <- final_dat$sent_date2
final_dat$sent_date2 <- NULL


# Correct division
final_dat$appeal_division = NA
final_dat$appeal_division = substr(final_dat$appeal_caseid,1,1)

#remove duplicates
final_dat <- final_dat[!duplicated(final_dat),]
final_dat <- final_dat[!duplicated(final_dat$appeal_caseid),] #three duplicates
colnames(final_dat)[colnames(final_dat)=="appeal_decision_detail"]="appeal_detail"

# time from petition announcement
final_dat$edate <- as.Date(final_dat$sent_date,format="%m/%d/%Y")
final_dat$time_a <- as.numeric(final_dat$edate-as.Date("06/06/2016",format="%m/%d/%Y"))

# if length of string column is longer than 128 then take first 128 characters (for stata)
df = as.data.frame(apply(final_dat, 2, function(x){
  if(class(x) == 'character') substr(x, 1, 128) else x}))
df$X.1 = df$X.1.x = df$X.1.y = NULL

#remove irrelevant columns
df = df[,-grep("_name",colnames(df))]
haven::write_dta(df,"recall_data_appellate.dta")

rm(case_dat,disp_dat,party_dat,sent_dat)


